home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Personal Computer World 2009 February
/
PCWFEB09.iso
/
Software
/
Resources
/
Chat & Communication
/
Digsby build 37
/
digsby_setup.exe
/
lib
/
lxml
/
cssselect.pyo
(
.txt
)
< prev
next >
Wrap
Python Compiled Bytecode
|
2008-10-13
|
27KB
|
943 lines
# Source Generated with Decompyle++
# File: in.pyo (Python 2.5)
import re
from lxml import etree
__all__ = [
'SelectorSyntaxError',
'ExpressionError',
'CSSSelector']
try:
_basestring = basestring
except NameError:
_basestring = str
class SelectorSyntaxError(SyntaxError):
pass
class ExpressionError(RuntimeError):
pass
class CSSSelector(etree.XPath):
def __init__(self, css):
path = css_to_xpath(css)
etree.XPath.__init__(self, path)
self.css = css
def __repr__(self):
return '<%s %s for %r>' % (self.__class__.__name__, hex(abs(id(self)))[2:], self.css)
try:
_unicode = unicode
except NameError:
_unicode = str
class _UniToken(_unicode):
def __new__(cls, contents, pos):
obj = _unicode.__new__(cls, contents)
obj.pos = pos
return obj
def __repr__(self):
return '%s(%s, %r)' % (self.__class__.__name__, _unicode.__repr__(self), self.pos)
class Symbol(_UniToken):
pass
class String(_UniToken):
pass
class Token(_UniToken):
pass
class Class(object):
def __init__(self, selector, class_name):
self.selector = selector
self.class_name = class_name
def __repr__(self):
return '%s[%r.%s]' % (self.__class__.__name__, self.selector, self.class_name)
def xpath(self):
sel_xpath = self.selector.xpath()
sel_xpath.add_condition("contains(concat(' ', normalize-space(@class), ' '), %s)" % xpath_repr(' ' + self.class_name + ' '))
return sel_xpath
class Function(object):
unsupported = [
'target',
'lang',
'enabled',
'disabled']
def __init__(self, selector, type, name, expr):
self.selector = selector
self.type = type
self.name = name
self.expr = expr
def __repr__(self):
return '%s[%r%s%s(%r)]' % (self.__class__.__name__, self.selector, self.type, self.name, self.expr)
def xpath(self):
sel_path = self.selector.xpath()
if self.name in self.unsupported:
raise ExpressionError('The psuedo-class %r is not supported' % self.name)
method = '_xpath_' + self.name.replace('-', '_')
if not hasattr(self, method):
raise ExpressionError('The psuedo-class %r is unknown' % self.name)
method = getattr(self, method)
return method(sel_path, self.expr)
def _xpath_nth_child(self, xpath, expr, last = False, add_name_test = True):
(a, b) = parse_series(expr)
if not a and not b and not last:
xpath.add_condition('false() and position() = 0')
return xpath
if add_name_test:
xpath.add_name_test()
xpath.add_star_prefix()
if a == 0:
if last:
b = 'last() - %s' % b
xpath.add_condition('position() = %s' % b)
return xpath
if last:
a = -a
b = -b
if b > 0:
b_neg = str(-b)
else:
b_neg = '+%s' % -b
if a != 1:
expr = [
'(position() %s) mod %s = 0' % (b_neg, a)]
else:
expr = []
if b >= 0:
expr.append('position() >= %s' % b)
elif b < 0 and last:
expr.append('position() < (last() %s)' % b)
expr = ' and '.join(expr)
if expr:
xpath.add_condition(expr)
return xpath
def _xpath_nth_last_child(self, xpath, expr):
return self._xpath_nth_child(xpath, expr, last = True)
def _xpath_nth_of_type(self, xpath, expr):
if xpath.element == '*':
raise NotImplementedError('*:nth-of-type() is not implemented')
return self._xpath_nth_child(xpath, expr, add_name_test = False)
def _xpath_nth_last_of_type(self, xpath, expr):
return self._xpath_nth_child(xpath, expr, last = True, add_name_test = False)
def _xpath_contains(self, xpath, expr):
if isinstance(expr, Element):
expr = expr._format_element()
xpath.add_condition('contains(css:lower-case(string(.)), %s)' % xpath_repr(expr.lower()))
return xpath
def _xpath_not(self, xpath, expr):
expr = expr.xpath()
cond = expr.condition
xpath.add_condition('not(%s)' % cond)
return xpath
def _make_lower_case(context, s):
return s.lower()
ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
ns.prefix = 'css'
ns['lower-case'] = _make_lower_case
class Pseudo(object):
unsupported = [
'indeterminate',
'first-line',
'first-letter',
'selection',
'before',
'after',
'link',
'visited',
'active',
'focus',
'hover']
def __init__(self, element, type, ident):
self.element = element
self.type = type
self.ident = ident
def __repr__(self):
return '%s[%r%s%s]' % (self.__class__.__name__, self.element, self.type, self.ident)
def xpath(self):
el_xpath = self.element.xpath()
if self.ident in self.unsupported:
raise ExpressionError('The psuedo-class %r is unsupported' % self.ident)
method = '_xpath_' + self.ident.replace('-', '_')
if not hasattr(self, method):
raise ExpressionError('The psuedo-class %r is unknown' % self.ident)
method = getattr(self, method)
el_xpath = method(el_xpath)
return el_xpath
def _xpath_checked(self, xpath):
xpath.add_condition("(@selected or @checked) and (name(.) = 'input' or name(.) = 'option')")
return xpath
def _xpath_root(self, xpath):
raise NotImplementedError
def _xpath_first_child(self, xpath):
xpath.add_star_prefix()
xpath.add_name_test()
xpath.add_condition('position() = 1')
return xpath
def _xpath_last_child(self, xpath):
xpath.add_star_prefix()
xpath.add_name_test()
xpath.add_condition('position() = last()')
return xpath
def _xpath_first_of_type(self, xpath):
if xpath.element == '*':
raise NotImplementedError('*:first-of-type is not implemented')
xpath.add_star_prefix()
xpath.add_condition('position() = 1')
return xpath
def _xpath_last_of_type(self, xpath):
if xpath.element == '*':
raise NotImplementedError('*:last-of-type is not implemented')
xpath.add_star_prefix()
xpath.add_condition('position() = last()')
return xpath
def _xpath_only_child(self, xpath):
xpath.add_name_test()
xpath.add_star_prefix()
xpath.add_condition('last() = 1')
return xpath
def _xpath_only_of_type(self, xpath):
if xpath.element == '*':
raise NotImplementedError('*:only-of-type is not implemented')
xpath.add_condition('last() = 1')
return xpath
def _xpath_empty(self, xpath):
xpath.add_condition('not(*) and not(normalize-space())')
return xpath
class Attrib(object):
def __init__(self, selector, namespace, attrib, operator, value):
self.selector = selector
self.namespace = namespace
self.attrib = attrib
self.operator = operator
self.value = value
def __repr__(self):
if self.operator == 'exists':
return '%s[%r[%s]]' % (self.__class__.__name__, self.selector, self._format_attrib())
else:
return '%s[%r[%s %s %r]]' % (self.__class__.__name__, self.selector, self._format_attrib(), self.operator, self.value)
def _format_attrib(self):
if self.namespace == '*':
return self.attrib
else:
return '%s|%s' % (self.namespace, self.attrib)
def _xpath_attrib(self):
if self.namespace == '*':
return '@' + self.attrib
else:
return '@%s:%s' % (self.namespace, self.attrib)
def xpath(self):
path = self.selector.xpath()
attrib = self._xpath_attrib()
value = self.value
if self.operator == 'exists':
path.add_condition(attrib)
elif self.operator == '=':
path.add_condition('%s = %s' % (attrib, xpath_repr(value)))
elif self.operator == '!=':
if value:
path.add_condition('not(%s) or %s != %s' % (attrib, attrib, xpath_repr(value)))
else:
path.add_condition('%s != %s' % (attrib, xpath_repr(value)))
elif self.operator == '~=':
path.add_condition("contains(concat(' ', normalize-space(%s), ' '), %s)" % (attrib, xpath_repr(' ' + value + ' ')))
elif self.operator == '|=':
path.add_condition('%s = %s or starts-with(%s, %s)' % (attrib, xpath_repr(value), attrib, xpath_repr(value + '-')))
elif self.operator == '^=':
path.add_condition('starts-with(%s, %s)' % (attrib, xpath_repr(value)))
elif self.operator == '$=':
path.add_condition('substring(%s, string-length(%s)-%s) = %s' % (attrib, attrib, len(value) - 1, xpath_repr(value)))
elif self.operator == '*=':
path.add_condition('contains(%s, %s)' % (attrib, xpath_repr(value)))
return path
class Element(object):
def __init__(self, namespace, element):
self.namespace = namespace
self.element = element
def __repr__(self):
return '%s[%s]' % (self.__class__.__name__, self._format_element())
def _format_element(self):
if self.namespace == '*':
return self.element
else:
return '%s|%s' % (self.namespace, self.element)
def xpath(self):
if self.namespace == '*':
el = self.element.lower()
else:
el = '%s:%s' % (self.namespace, self.element)
return XPathExpr(element = el)
class Hash(object):
def __init__(self, selector, id):
self.selector = selector
self.id = id
def __repr__(self):
return '%s[%r#%s]' % (self.__class__.__name__, self.selector, self.id)
def xpath(self):
path = self.selector.xpath()
path.add_condition('@id = %s' % xpath_repr(self.id))
return path
class Or(object):
def __init__(self, items):
self.items = items
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, self.items)
def xpath(self):
paths = [ item.xpath() for item in self.items ]
return XPathExprOr(paths)
class CombinedSelector(object):
_method_mapping = {
' ': 'descendant',
'>': 'child',
'+': 'direct_adjacent',
'~': 'indirect_adjacent' }
def __init__(self, selector, combinator, subselector):
self.selector = selector
self.combinator = combinator
self.subselector = subselector
def __repr__(self):
if self.combinator == ' ':
comb = '<followed>'
else:
comb = self.combinator
return '%s[%r %s %r]' % (self.__class__.__name__, self.selector, comb, self.subselector)
def xpath(self):
if self.combinator not in self._method_mapping:
raise ExpressionError('Unknown combinator: %r' % self.combinator)
method = '_xpath_' + self._method_mapping[self.combinator]
method = getattr(self, method)
path = self.selector.xpath()
return method(path, self.subselector)
def _xpath_descendant(self, xpath, sub):
xpath.join('/descendant::', sub.xpath())
return xpath
def _xpath_child(self, xpath, sub):
xpath.join('/', sub.xpath())
return xpath
def _xpath_direct_adjacent(self, xpath, sub):
xpath.join('/following-sibling::', sub.xpath())
xpath.add_name_test()
xpath.add_condition('position() = 1')
return xpath
def _xpath_indirect_adjacent(self, xpath, sub):
xpath.join('/following-sibling::', sub.xpath())
return xpath
_el_re = re.compile('^\\w+\\s*$')
_id_re = re.compile('^(\\w*)#(\\w+)\\s*$')
_class_re = re.compile('^(\\w*)\\.(\\w+)\\s*$')
def css_to_xpath(css_expr, prefix = 'descendant-or-self::'):
if isinstance(css_expr, _basestring):
match = _el_re.search(css_expr)
if match is not None:
return '%s%s' % (prefix, match.group(0).strip())
match = _id_re.search(css_expr)
if match is not None:
if not match.group(1):
pass
return "%s%s[@id = '%s']" % (prefix, '*', match.group(2))
match = _class_re.search(css_expr)
if match is not None:
if not match.group(1):
pass
return "%s%s[contains(concat(' ', normalize-space(@class), ' '), ' %s ')]" % (prefix, '*', match.group(2))
css_expr = parse(css_expr)
expr = css_expr.xpath()
if prefix:
expr.add_prefix(prefix)
return str(expr)
class XPathExpr(object):
def __init__(self, prefix = None, path = None, element = '*', condition = None, star_prefix = False):
self.prefix = prefix
self.path = path
self.element = element
self.condition = condition
self.star_prefix = star_prefix
def __str__(self):
path = ''
if self.prefix is not None:
path += str(self.prefix)
if self.path is not None:
path += str(self.path)
path += str(self.element)
if self.condition:
path += '[%s]' % self.condition
return path
def __repr__(self):
return '%s[%s]' % (self.__class__.__name__, self)
def add_condition(self, condition):
if self.condition:
self.condition = '%s and (%s)' % (self.condition, condition)
else:
self.condition = condition
def add_path(self, part):
self.element = part
def add_prefix(self, prefix):
if self.prefix:
self.prefix = prefix + self.prefix
else:
self.prefix = prefix
def add_name_test(self):
if self.element == '*':
return None
self.add_condition('name() = %s' % xpath_repr(self.element))
self.element = '*'
def add_star_prefix(self):
if self.path:
self.path += '*/'
else:
self.path = '*/'
self.star_prefix = True
def join(self, combiner, other):
prefix = str(self)
prefix += combiner
path = None + '' if not other.prefix else ''
if other.star_prefix and path == '*/':
path = ''
self.prefix = prefix
self.path = path
self.element = other.element
self.condition = other.condition
class XPathExprOr(XPathExpr):
def __init__(self, items, prefix = None):
for item in items:
pass
self.items = items
self.prefix = prefix
def __str__(self):
if not self.prefix:
pass
prefix = ''
return []([ prefix + str(i) for i in self.items ])
def xpath_repr(s):
if isinstance(s, Element):
s = s._format_element()
return repr(str(s))
def parse(string):
stream = TokenStream(tokenize(string))
stream.source = string
try:
return parse_selector_group(stream)
except SelectorSyntaxError:
e = sys.exc_info()[1]
e.args = tuple([
'%s at %s -> %s' % (e, stream.used, list(stream))])
raise
def parse_selector_group(stream):
result = []
while None:
if stream.peek() == ',':
stream.next()
continue
break
continue
if len(result) == 1:
return result[0]
else:
return Or(result)
def parse_selector(stream):
result = parse_simple_selector(stream)
while None:
peek = stream.peek()
if peek == ',' or peek is None:
return result
elif peek in ('+', '>', '~'):
combinator = stream.next()
else:
combinator = ' '
next_selector = parse_simple_selector(stream)
result = CombinedSelector(result, combinator, next_selector)
continue
return result
def parse_simple_selector(stream):
peek = stream.peek()
if peek != '*' and not isinstance(peek, Symbol):
element = namespace = '*'
else:
next = stream.next()
if next != '*' and not isinstance(next, Symbol):
raise SelectorSyntaxError('Expected symbol, got %r' % next)
if stream.peek() == '|':
namespace = next
stream.next()
element = stream.next()
if element != '*' and not isinstance(next, Symbol):
raise SelectorSyntaxError('Expected symbol, got %r' % next)
else:
namespace = '*'
element = next
result = Element(namespace, element)
has_hash = False
while None:
peek = stream.peek()
if peek == '#':
if has_hash:
break
stream.next()
result = Hash(result, stream.next())
has_hash = True
continue
continue
if peek == '.':
stream.next()
result = Class(result, stream.next())
continue
continue
if peek == '[':
stream.next()
result = parse_attrib(result, stream)
next = stream.next()
if not next == ']':
raise SelectorSyntaxError('] expected, got %r' % next)
continue
continue
continue
if peek == ':' or peek == '::':
type = stream.next()
ident = stream.next()
if not isinstance(ident, Symbol):
raise SelectorSyntaxError('Expected symbol, got %r' % ident)
if stream.peek() == '(':
stream.next()
peek = stream.peek()
if isinstance(peek, String):
selector = stream.next()
elif isinstance(peek, Symbol) and is_int(peek):
selector = int(stream.next())
else:
selector = parse_simple_selector(stream)
next = stream.next()
if not next == ')':
raise SelectorSyntaxError('Expected ), got %r and %r' % (next, selector))
result = Function(result, type, ident, selector)
continue
result = Pseudo(result, type, ident)
continue
continue
if peek == ' ':
stream.next()
break
continue
return result
def is_int(v):
try:
int(v)
except ValueError:
return False
return True
def parse_attrib(selector, stream):
attrib = stream.next()
if stream.peek() == '|':
namespace = attrib
stream.next()
attrib = stream.next()
else:
namespace = '*'
if stream.peek() == ']':
return Attrib(selector, namespace, attrib, 'exists', None)
op = stream.next()
if op not in ('^=', '$=', '*=', '=', '~=', '|=', '!='):
raise SelectorSyntaxError('Operator expected, got %r' % op)
value = stream.next()
if not isinstance(value, (Symbol, String)):
raise SelectorSyntaxError('Expected string or symbol, got %r' % value)
return Attrib(selector, namespace, attrib, op, value)
def parse_series(s):
if isinstance(s, Element):
s = s._format_element()
if not s or s == '*':
return (0, 0)
if isinstance(s, int):
return (0, s)
if s == 'odd':
return (2, 1)
elif s == 'even':
return (2, 0)
elif s == 'n':
return (1, 0)
if 'n' not in s:
return (0, int(s))
(a, b) = s.split('n', 1)
if not a:
a = 1
elif a == '-' or a == '+':
a = int(a + '1')
else:
a = int(a)
if not b:
b = 0
elif b == '-' or b == '+':
b = int(b + '1')
else:
b = int(b)
return (a, b)
_whitespace_re = re.compile('\\s+')
_comment_re = re.compile('/\\*.*?\\*/', re.S)
_count_re = re.compile('[+-]?\\d*n(?:[+-]\\d+)?')
def tokenize(s):
pos = 0
s = _comment_re.sub('', s)
while None:
match = _whitespace_re.match(s, pos = pos)
if match:
preceding_whitespace_pos = pos
pos = match.end()
else:
preceding_whitespace_pos = 0
if pos >= len(s):
return None
match = _count_re.match(s, pos = pos)
if match and match.group() != 'n':
sym = s[pos:match.end()]
yield Symbol(sym, pos)
pos = match.end()
continue
c = s[pos]
c2 = s[pos:pos + 2]
if c2 in ('~=', '|=', '^=', '$=', '*=', '::', '!='):
yield Token(c2, pos)
pos += 2
continue
if c in '>+~,.*=[]()|:#':
if c in '.#' and preceding_whitespace_pos > 0:
yield Token(' ', preceding_whitespace_pos)
yield Token(c, pos)
pos += 1
continue
if c == '"' or c == "'":
old_pos = pos
(sym, pos) = tokenize_escaped_string(s, pos)
yield String(sym, old_pos)
continue
old_pos = pos
(sym, pos) = tokenize_symbol(s, pos)
yield Symbol(sym, old_pos)
continue
continue
return None
def tokenize_escaped_string(s, pos):
quote = s[pos]
pos = pos + 1
start = pos
while None:
next = s.find(quote, pos)
if next == -1:
raise SelectorSyntaxError('Expected closing %s for string in: %r' % (quote, s[start:]))
result = s[start:next]
try:
result = result.encode('ASCII', 'backslashreplace').decode('unicode_escape')
except UnicodeDecodeError:
pos = next + 1
continue
return (result, next + 1)
continue
return None
_illegal_symbol = re.compile('[^\\w\\\\-]', re.UNICODE)
def tokenize_symbol(s, pos):
start = pos
match = _illegal_symbol.search(s, pos = pos)
if not match:
return (s[start:], len(s))
if match.start() == pos:
pass
if not match:
result = s[start:]
pos = len(s)
else:
result = s[start:match.start()]
pos = match.start()
try:
result = result.encode('ASCII', 'backslashreplace').decode('unicode_escape')
except UnicodeDecodeError:
e = sys.exc_info()[1]
raise SelectorSyntaxError('Bad symbol %r: %s' % (result, e))
return (result, pos)
class TokenStream(object):
def __init__(self, tokens, source = None):
self.used = []
self.tokens = iter(tokens)
self.source = source
self.peeked = None
self._peeking = False
try:
self.next_token = self.tokens.next
except AttributeError:
self.next_token = self.tokens.__next__
def next(self):
if self._peeking:
self._peeking = False
self.used.append(self.peeked)
return self.peeked
else:
try:
next = self.next_token()
self.used.append(next)
return next
except StopIteration:
return None
def __iter__(self):
return iter(self.next, None)
def peek(self):
if not self._peeking:
try:
self.peeked = self.next_token()
except StopIteration:
return None
self._peeking = True
return self.peeked